import pandas as pd

# 加载数据
data = pd.read_csv('payment.csv')

# 查看数据基本信息
print(data.info())
print(data.describe())
# 处理缺失值
data.dropna(inplace=True)

# 处理异常值
data = data[data['amount'] < data['amount'].quantile(0.99)]
import matplotlib.pyplot as plt

# 可视化支付金额分布
plt.hist(data['amount'], bins=50)
plt.title('Payment Amount Distribution')
plt.xlabel('Payment Amount')
plt.ylabel('Frequency')
plt.show()
# 统计支付方式分布
payment_methods = data['payment_method'].value_counts()
print(payment_methods)
# 可视化支付方式分布
payment_methods.plot(kind='bar')
plt.title('Payment Methods Distribution')
plt.xlabel('Payment Method')
plt.ylabel('Frequency')
plt.show()